************************************************************
************************************************************
**														  **
**	Tanzania - Assembling 3 panel rounds				  **
**										   				  **
**										   				  **
************************************************************
************************************************************

clear
version 14
set more off
cap log close

gl OUT 	"${LOCDRIVE}\Results"
gl TEMP	"${LOCDRIVE}\Tempdata"
gl FIG "${LOCDRIVE}\Figures"

loc r1_hh hhid
loc r1_weight hh_weight
loc r1_indiv sbmemno
loc r2_hh y2_hhid
loc r2_weight y2_weight
loc r2_indiv indidy2
loc r3_hh y3_hhid
loc r3_weight y3_weight
loc r3_indiv indidy3

loc ea ea
loc strata strataid
loc admin0 macroregion
loc admin1 region 		
loc admin2 district  	
loc admin3 ward

/*loc f1 Staple
loc f2 PNFFV
loc f3 ASF
loc f4 OF
*/

loc gooddisagglist $gooddisagglist 
loc servicedisagglist $servicedisagglist

log using "${TEMP}\PanelAssembly_Tanzania", replace


**************************
** Wave 2 to Wave 1 **
**************************
use "${DATADRIVE}\LSMS-ISA\2010-11\HH_SEC_B.dta", clear
sort `r2_hh' `r2_indiv'
rename hhid_2008 `r1_hh'
encode `r1_hh', gen(`r1_hh'_c)
bysort `r2_hh': egen rank=rank(`r1_hh'_c), track
bysort `r2_hh': egen sd=sd(rank)
count
loc allobs = r(N)
count if sd==. | sd==0
loc obssamehh = r(N)
if `allobs' != `obssamehh' {
	display "Problem with at least one y2 hh mapping onto multiple y1 hhs!"
	end
	}
collapse (firstnm) `r1_hh', by(`r2_hh')
sort `r2_hh' `r1_hh' 

save "${TEMP}\WaveLink_R2_R1.dta", replace


*************************
** Wave 3 to wave 2 **
*************************

use "${DATADRIVE}\LSMS-ISA\2012-13\HH_SEC_A.dta", clear

rename hh_a09 `r2_hh'

isid `r3_hh'

keep `r3_hh' `r2_hh'
sort `r3_hh' `r2_hh' 

save "${TEMP}\WaveLink_R3_R2.dta", replace


***********************
** Assemble Panel **
***********************

** First panel -- start with R3 and trace all back. Some R2 and R1 obs will be repeated. Use R3 R2 R1 round as identifier.

use "${TEMP}\Tanzania_HH_R3.dta", clear
sort `r3_hh'
merge 1:1 `r3_hh' using "${TEMP}\WaveLink_R3_R2.dta", nogen keep(1 3)
sort `r3_hh' `r2_hh'
merge m:1 `r2_hh' using "${TEMP}\WaveLink_R2_R1.dta", nogen keep(1 3)
sort `r3_hh' `r2_hh' `r1_hh'
isid `r3_hh'
g round=3
tempfile round3
save `round3', replace

use "${TEMP}\Tanzania_HH_R2.dta", clear
sort `r2_hh'
merge 1:1 `r2_hh' using "${TEMP}\WaveLink_R2_R1.dta", nogen keep(1 3)
sort `r2_hh'
merge 1:m `r2_hh' using "${TEMP}\WaveLink_R3_R2.dta",  keep(3) /*148 hhs do not appear in R3 and are dropped*/
sort `r3_hh' `r2_hh' `r1_hh'
isid `r3_hh' `r2_hh'
g round=2
tempfile round2
save `round2', replace

use "${TEMP}\Tanzania_HH_R1.dta", clear
sort `r1_hh'
merge 1:m `r1_hh' using "${TEMP}\WaveLink_R2_R1.dta", nogen keep(3)
sort `r2_hh'
merge 1:m `r2_hh' using "${TEMP}\WaveLink_R3_R2.dta", nogen keep(3) /*148 hhs do not appear in R3 and are dropped*/
sort `r3_hh' `r2_hh' `r1_hh'
isid `r3_hh' `r2_hh' `r1_hh'
g round=1
tempfile round1
save `round1', replace

use `round3', clear
append using `round2'
append using `round1'
sort `r3_hh' `r2_hh' `r1_hh' round
isid `r3_hh' `r2_hh' `r1_hh' round

bysort `r3_hh': egen wt_r3bwd=median(`r3_weight')	

save "${TEMP}\Tanzania_Panel_r3bwd.dta", replace

keep `r1_hh' `r2_hh' `r3_hh' round `ea' `admin3' `admin2' `admin1' `admin0' `r1_weight'

tempfile sample_r3bwd
save `sample_r3bwd', replace


** Second panel -- start with R1 and trace forward the main household.

use "${TEMP}\Tanzania_HH_R2.dta", clear
sort `r2_hh'
merge 1:1 `r2_hh' using "${TEMP}\WaveLink_R2_R1.dta", nogen keep(3)
g count=1
bysort `r1_hh': g r2id = sum(count)
drop count
keep if r2id==1
isid `r1_hh'
g round=2
tempfile round2_nodup
save `round2_nodup', replace

use "${TEMP}\Tanzania_HH_R3.dta", clear
sort `r3_hh'
merge 1:1 `r3_hh' using "${TEMP}\WaveLink_R3_R2.dta", nogen keep(3)
g count=1
bysort `r2_hh': g r3id = sum(count)
drop count
keep if r3id==1
isid `r2_hh'
sort `r2_hh'
merge 1:1 `r2_hh' using "${TEMP}\WaveLink_R2_R1.dta", nogen keep(3)
g count=1
bysort `r1_hh': g r2id = sum(count)
drop count
keep if r2id==1
isid `r1_hh'
g round=3
tempfile round3_nodup
save `round3_nodup', replace

use "${TEMP}\Tanzania_HH_R1.dta", clear
sort `r1_hh'
g round=1
append using `round2_nodup'
append using `round3_nodup'

sort `r1_hh' round
isid `r1_hh' round

forvalues r = 1/3 {
	g count = round==`r'
	bysort hhid: egen hasround`r' = sum(count)
	drop count
	}
tab hasround1
tab hasround2
tab hasround3

drop if hasround2==0 | hasround3==0
tab round

bysort `r1_hh': egen wt_r1fwd=median(`r1_weight')

save "${TEMP}\Tanzania_Panel_r1fwd.dta", replace

keep `r1_hh' `r2_hh' `r3_hh' round  `ea' `admin3' `admin2' `admin1' `admin0' `r1_weight'

tempfile sample_r1fwd
save `sample_r1fwd', replace

save "${TEMP}\Sample_r1fwd.dta", replace



***** THIS IS FOR 3 GOODS 

use "${TEMP}\Tanzania_Panel_r1fwd.dta", clear

g exp_tot = hhexp

forvalues i = 1/4 {
	g exp_food`i' = foodcons`i' /*this includes all consumed food in home including that grown by the household*/
	}
g exp_food = foodcons 


* Alt exp agg for SL prices
egen exp_good_alc = rowtotal(exp_good_alc_secLM)
egen exp_good_cons = rowtotal(exp_good_cons_secLM)
egen exp_good_util = rowtotal(exp_good_util_secLM) /*this is fuel but also funiture - best price data we have*/
egen exp_service_util = rowtotal(exp_service_util_secLM)
egen exp_service_comms = rowtotal(exp_service_comms_secLM)
egen exp_service_recr = rowtotal(exp_service_recr_secLM)
egen exp_service_transport = rowtotal(exp_service_transport_secLM)
replace exp_service_food = exp_service_food + exp_service_food_secLM
egen exp_service_housing = rowtotal(exp_service_housing_secLM exp_service_rent)
egen exp_service_misc = rowtotal(exp_service_misc_secLM)

loc goodvarlist ""
foreach var in `gooddisagglist' {
	loc goodvarlist `goodvarlist' exp_`var'
	}
loc servicevarlist ""  
foreach var in `servicedisagglist' {
	loc servicevarlist `servicevarlist' exp_`var'
	}

display "`goodvarlist'"
display "`servicevarlist'"


** GOOD SERVICE EXP AGGREGATES

egen exp_good = rowtotal(`goodvarlist')
egen exp_service = rowtotal(`servicevarlist')

egen exp_gsfot = rowtotal(exp_food exp_good exp_service)

g w_good = exp_good / exp_gsfot
g w_service = exp_service / exp_gsfot
forvalues i = 1/4 {
	g w_food`i' = exp_food`i' / exp_gsfot
	}
g w_food = exp_food / exp_gsfot


** Detailed exp categories 

foreach expcat in `gooddisagglist' `servicedisagglist' {
	g wd_`expcat' = exp_`expcat' / exp_gsfot
	la var wd_`expcat' "`expcat' exp (share tot)"
	}
		
* budget shares:
la var w_service "Services expenditures (share tot)"
la var w_good "Goods expenditures (share tot)"
forvalues i = 1/4 {
	la var w_food`i' "`f`i'' expenditures (share tot)"
	}

la var w_food "Food expenditures (share tot)"


* expenditures by category:
la var exp_service "Services expenditures (LC)"
g exp_service_l = log(exp_service)
la var exp_service_l "Services expenditures (log LC)"	
la var exp_good "Goods expenditures (LC)"
g exp_good_l = log(exp_good)
la var exp_good_l "Goods expenditures (log LC)"

la var exp_food "Food expenditures (LC)"
g exp_food_l = log(exp_food)
la var exp_food_l "Food expenditures (log LC)"	
forvalues i = 1/4 {
	la var exp_food`i' "`f`i'' expenditures (LC)"
	g exp_food`i'_l = log(exp_food`i')
	la var exp_food`i'_l "`f`i'' expenditures (log LC)"
	}

	
* total expenditures:
la var exp_tot "Total expenditures (LC)"
la var exp_gsfot "Total expenditures (LC)"
g exp_tot_l = log(exp_gsfot)
la var exp_tot_l "Total expenditures (log LC)"


* demographic controls: 
la var urban "Urban residing hh"
label define urbanrural 0 "Rural" 1 "Urban"
label values urban urbanrural

la var hhsize_adeq "HH size (adult equiv)"
la var hh_depshare "HH dep ratio (share)"
la var educ_yrs_max "Max educ by hh member (yrs)"
la var educ_yrs_head "Educ of hh head (yrs)"

la var pcmh_farm "HH participates farming" /*hh participates in farming at least 35 hrs pr year*/
la var pcmh_ent "HH participates non-farm self-employment" /*hh participates in non-farm self employment at least 35 hrs pr year*/
la var pcmh_market "HH participates wage labor" /*hh participates in wage employment at least 35 hrs pr year*/


forvalues i = 1/3 {
	g r`i' = round==`i'
	}
	
* round dummies
la var r1 "Round 1"
la var r2 "Round 2"
la var r3 "Round 3"

forvalues r=1/3 {
	display "Round `r': "
	qui: count if w_food == 0 & round==`r'
	display "No food: `r(N)'"
	qui: count if w_good == 0 & round==`r'
	display "No good: `r(N)'"
	qui: count if w_service == 0 & round==`r'
	display "No service: `r(N)'"
	qui: count if w_food1==0 & w_food2==0 & w_food3==0 & w_food4==0 & w_good == 0 & w_service == 0 & round==`r'
	display "No consumption at all: `r(N)'"
	}


* macro region dummies
forvalues i = 1/8 {
	g admin0`i' = `admin0'==`i'
	}

* set as time series
encode `r1_hh', generate(hh_c)
g wt_pan = wt_r1fwd
tsset hh_c round, generic

egen ea_c = group(`admin0' `admin1' `admin2' `admin3' `ea')
bysort `r1_hh': egen eabase_c = median(ea_c) /*rounds 2 and 3 have the same ea coding, this makes it constant for each hh*/

egen admin3_c = group(`admin0' `admin1' `admin2' `admin3')
bysort `r1_hh': egen admin3base_c = median(admin3_c) /*rounds 2 and 3 have the same ea coding, this makes it constant for each hh*/

g wt_cur = `r1_weight' if round==1
	replace wt_cur = `r2_weight' if round==2
	replace wt_cur = `r3_weight' if round==3
	

save "${TEMP}\Tanzania_Panel_r1fwd.dta", replace


** Describe expenditures in detail

import excel using "${LOCDRIVE}\Parameters\GoodServiceCodes.xlsx", clear firstrow sheet("Tanzania_R1")
keep itemcode itemnametab good_cons good_util good_educ good_health service_util service_comms service_recr service_transport service_food service_housing service_educ service_health service_misc
tempfile goodservicecodes_r1
save `goodservicecodes_r1', replace
import excel using "${LOCDRIVE}\Parameters\GoodServiceCodes.xlsx", clear firstrow sheet("Tanzania_R2")
keep itemcode itemnametab good_cons good_util good_educ good_health service_util service_comms service_recr service_transport service_food service_housing service_educ service_health service_misc
tempfile goodservicecodes_r2
save `goodservicecodes_r2', replace
import excel using "${LOCDRIVE}\Parameters\GoodServiceCodes.xlsx", clear firstrow sheet("Tanzania_R3")
keep itemcode itemnametab good_cons good_util good_educ good_health service_util service_comms service_recr service_transport service_food service_housing service_educ service_health service_misc
tempfile goodservicecodes_r3
save `goodservicecodes_r3', replace

foreach good in good_cons good_util service_util service_comms service_recr service_transport service_food service_housing service_misc {
	display "GOOD IS: `good'"
	forvalues r=1/3 { 
		use "${TEMP}\Tanzania_Panel_r1fwd.dta", clear
		keep if round==`r'
		keep `r`r'_hh' round exp_gsfot wt_cur
		tempfile exp_r`r'_tot
		save `exp_r`r'_tot', replace
		
		use "${TEMP}\Tanzania_GoodService_Items_R`r'.dta", clear
		g round=`r'
		merge m:1 itemcode using `goodservicecodes_r`r'', nogen keep(1 3) keepusing(itemnametab `good')
		keep if `good'==1
		
		sort `r`r'_hh' itemnametab
		collapse (sum) exp_`good', by(`r`r'_hh' round itemnametab) /*some items are disaggregated this gets them into the same groups across rounds*/
		levelsof itemnametab, local(itemlist)
		local numitems : word count `itemlist'
		
		tempfile itemsusingdata
		save `itemsusingdata', replace
		
		use `exp_r`r'_tot', clear
		g count=1
		expand `numitems'
		bysort `r`r'_hh' round: gen itemnum=sum(count)
		tab itemnum
		drop count
		g itemnametab = ""
		forvalues i = 1/`numitems' {
			loc itemname `: word `i' of `itemlist''
			display "i: `i'; itemname: `itemname'"
			replace itemnametab = "`itemname'" if itemnum==`i'
			}
		
		tab itemnametab itemnum
		merge 1:1 `r`r'_hh' itemnametab round using `itemsusingdata', nogen /*instances of _m==2 */
			
		recode exp_`good' (.=0) 
		replace exp_`good' = exp_`good'/exp_gsfot
		drop exp_gsfot
		
		collapse (mean) exp_`good' (sd) sd=exp_`good' [aw=wt_cur], by(itemnametab round)
		
		tempfile detailgs_`good'_r`r'
		save `detailgs_`good'_r`r'', replace
		}

	use `detailgs_`good'_r1', clear
	append using `detailgs_`good'_r2'
	append using `detailgs_`good'_r3'
	encode itemname, generate(item_c)

	format exp_`good' sd %9.3f
	rename (exp_`good' sd) (exp_good_r exp_good_sd_r)

	reshape wide exp_good_r exp_good_sd_r, i(itemname item_c) j(round)
	order itemname exp_good_r1 exp_good_sd_r1 exp_good_r2 exp_good_sd_r2 exp_good_r3 exp_good_sd_r3
	
	label variable itemname ""
	forvalues r=1/3 {
		label variable exp_good_r`r' "Budget share (R`r')"
		label variable exp_good_sd_r`r' "(SD)"
		}

	drop if item_c == .
	drop if exp_good_r1==. | exp_good_r2==. | exp_good_r3==.
	
	tempfile exp_`good'_item
	save `exp_`good'_item', replace
	}


use `exp_good_cons_item', clear
append using `exp_good_util_item'
texsave itemname exp_good_r1 exp_good_sd_r1 exp_good_r2 exp_good_sd_r2 exp_good_r3 exp_good_sd_r3 using "${TEMP}\Summary_Tanzania_consumer_good_items.tex", varlabels replace frag title("Average budget share of each detailed good item, by survey round.") marker(Tab:summarystats_wd_consgood)
filefilter "${TEMP}\Summary_Tanzania_consumer_good_items.tex" "${FIG}\Summary_Tanzania_consumer_good_items.tex", from("lCCCCCC") to("p{6cm}CCCCCC") replace

use `exp_service_util_item', clear
append using `exp_service_comms_item'
append using `exp_service_recr_item'
append using `exp_service_transport_item'
append using `exp_service_food_item'
append using `exp_service_housing_item'
append using `exp_service_misc_item'
texsave itemname exp_good_r1 exp_good_sd_r1 exp_good_r2 exp_good_sd_r2 exp_good_r3 exp_good_sd_r3 using "${TEMP}\Summary_Tanzania_consumer_service_items.tex", varlabels replace frag title("Average budget share of each consumer service item, by survey round.") marker(Tab:summarystats_wd_consservice)
filefilter "${TEMP}\Summary_Tanzania_consumer_service_items.tex" "${FIG}\Summary_Tanzania_consumer_service_items.tex", from("lCCCCCC") to("p{6cm}CCCCCC") replace

